/*
 * Fast copy routine.  Derived from aligned_block_copy.
 ^
 *	bcopy(caddr_t src, caddr_t dst, unsigned int len)
 *
 *	a0 	src address
 *	a1	dst address
 *	a2	length
 *
 * Author:	Chris Maeda
 * Date:	June 1993
 *
 * Mach Operating System
 * Copyright (c) 1993 Carnegie Mellon University
 * All Rights Reserved.
 *
 * Permission to use, copy, modify and distribute this software and its
 * documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie Mellon
 * the rights to redistribute these changes.
 */
#if defined(MEMCOPY) || defined(MEMMOVE)
#   ifdef MEMCOPY
#      define FUNCTION	memcpy
#   else
#      define FUNCTION	memmove
#   endif
#   define SRCREG	$a1
#   define DSTREG	$a0
#else
#   define FUNCTION	bcopy
#   define SRCREG	$a0
#   define DSTREG	$a1
#endif

#define	SIZEREG		$a2

	.globl  FUNCTION
	.type	FUNCTION, @function
FUNCTION:
	.set	noat
	.set	noreorder

#if defined(MEMCOPY) || defined(MEMMOVE)
	/* set up return value, while we still can */
	move	$v0,DSTREG
#endif
	#
	# Make sure we can copy forwards.
	#
	sltu	$t0,SRCREG,DSTREG	# t0 == SRCREG < DSTREG
	bne	$t0,$zero,6f		# copy backwards

	#
	# There are four alignment cases (with frequency)
	# (Based on measurements taken with a DECstation 5000/200
	# inside a Mach kernel.)
	#
	# aligned   -> aligned		(mostly)
	# unaligned -> aligned		(sometimes)
	# aligned,unaligned -> unaligned	(almost never)
	#
	# Note that we could add another case that checks if
	# the destination and source are unaligned but the
	# copy is alignable.  eg if src and dest are both
	# on a halfword boundary.
	#
	andi	$t1,DSTREG,3		# get last 3 bits of dest
	bne	$t1,$zero,3f
	andi	$t0,SRCREG,3		# get last 3 bits of src
	bne	$t0,$zero,5f

	#
	# Forward aligned->aligned copy, 8*4 bytes at a time.
	#
	li	$at,-32
	and	$t0,SIZEREG,$at		# count truncated to multiple of 32 */
	addu	$a3,SRCREG,$t0		# run fast loop up to this address
	sltu	$at,SRCREG,$a3		# any work to do?
	beq	$at,$zero,2f
	subu	SIZEREG,$t0

	#
	# loop body
	#
1:	# cp
	lw	$t3,0(SRCREG)
	lw	$v1,4(SRCREG)
	lw	$t0,8(SRCREG)
	lw	$t1,12(SRCREG)
	addiu	SRCREG,32
	sw	$t3,0(DSTREG)
	sw	$v1,4(DSTREG)
	sw	$t0,8(DSTREG)
	sw	$t1,12(DSTREG)
	lw	$t1,-4(SRCREG)
	lw	$t0,-8(SRCREG)
	lw	$v1,-12(SRCREG)
	lw	$t3,-16(SRCREG)
	addiu	DSTREG,32
	sw	$t1,-4(DSTREG)
	sw	$t0,-8(DSTREG)
	sw	$v1,-12(DSTREG)
	bne	SRCREG,$a3,1b
	sw	$t3,-16(DSTREG)

	#
	# Copy a word at a time, no loop unrolling.
	#
2:	# wordcopy
	andi	$t2,SIZEREG,3		# get byte count / 4
	subu	$t2,SIZEREG,$t2		# t2 = number of words to copy * 4
	beq	$t2,$zero,3f
	addu	$t0,SRCREG,$t2		# stop at t0
	subu	SIZEREG,SIZEREG,$t2
1:
	lw	$t3,0(SRCREG)
	addiu	SRCREG,4
	sw	$t3,0(DSTREG)
	bne	SRCREG,$t0,1b
	addiu	DSTREG,4

3:	# bytecopy
	beq	SIZEREG,$zero,4f	# nothing left to do?
	nop
1:
	lb	$t3,0(SRCREG)
	addiu	SRCREG,1
	sb	$t3,0(DSTREG)
	addiu	SIZEREG,-1
	bgtz	SIZEREG,1b
	addiu	DSTREG,1

4:	# copydone
	j	$ra
	nop

	#
	# Copy from unaligned source to aligned dest.
	#
5:	# destaligned
	andi	$t0,SIZEREG,3		# t0 = bytecount mod 4
	subu	$a3,SIZEREG,$t0		# number of words to transfer
	beq	$a3,$zero,3b
	nop
	move	SIZEREG,$t0		# this many to do after we are done
	addu	$a3,SRCREG,$a3		# stop point

1:
	LWHI	$t3,0(SRCREG)
	LWLO	$t3,3(SRCREG)
	addi	SRCREG,4
	sw	$t3,0(DSTREG)
	bne	SRCREG,$a3,1b
	addi	DSTREG,4

	j	3b
	nop

6:	# backcopy -- based on above
	addu	SRCREG,SIZEREG
	addu	DSTREG,SIZEREG
	andi	$t1,DSTREG,3		# get last 3 bits of dest
	bne	$t1,$zero,3f
	andi	$t0,SRCREG,3		# get last 3 bits of src
	bne	$t0,$zero,5f

	#
	# Forward aligned->aligned copy, 8*4 bytes at a time.
	#
	li	$at,-32
	and	$t0,SIZEREG,$at		# count truncated to multiple of 32
	beq	$t0,$zero,2f		# any work to do?
	subu	SIZEREG,$t0
	subu	$a3,SRCREG,$t0

	#
	# loop body
	#
1:	# cp
	lw	$t3,-16(SRCREG)
	lw	$v1,-12(SRCREG)
	lw	$t0,-8(SRCREG)
	lw	$t1,-4(SRCREG)
	addiu	SRCREG,-32
	sw	$t3,-16(DSTREG)
	sw	$v1,-12(DSTREG)
	sw	$t0,-8(DSTREG)
	sw	$t1,-4(DSTREG)
	lw	$t1,12(SRCREG)
	lw	$t0,8(SRCREG)
	lw	$v1,4(SRCREG)
	lw	$t3,0(SRCREG)
	addiu	DSTREG,-32
	sw	$t1,12(DSTREG)
	sw	$t0,8(DSTREG)
	sw	$v1,4(DSTREG)
	bne	SRCREG,$a3,1b
	sw	$t3,0(DSTREG)

	#
	# Copy a word at a time, no loop unrolling.
	#
2:	# wordcopy
	andi	$t2,SIZEREG,3		# get byte count / 4
	subu	$t2,SIZEREG,$t2		# t2 = number of words to copy * 4
	beq	$t2,$zero,3f
	subu	$t0,SRCREG,$t2		# stop at t0
	subu	SIZEREG,SIZEREG,$t2
1:
	lw	$t3,-4(SRCREG)
	addiu	SRCREG,-4
	sw	$t3,-4(DSTREG)
	bne	SRCREG,$t0,1b
	addiu	DSTREG,-4

3:	# bytecopy
	beq	SIZEREG,$zero,4f	# nothing left to do?
	nop
1:
	lb	$t3,-1(SRCREG)
	addiu	SRCREG,-1
	sb	$t3,-1(DSTREG)
	addiu	SIZEREG,-1
	bgtz	SIZEREG,1b
	addiu	DSTREG,-1

4:	# copydone
	j	$ra
	nop

	#
	# Copy from unaligned source to aligned dest.
	#
5:	# destaligned
	andi	$t0,SIZEREG,3		# t0 = bytecount mod 4
	subu	$a3,SIZEREG,$t0		# number of words to transfer
	beq	$a3,$zero,3b
	nop
	move	SIZEREG,$t0		# this many to do after we are done
	subu	$a3,SRCREG,$a3		# stop point

1:
	LWHI	$t3,-4(SRCREG)
	LWLO	$t3,-1(SRCREG)
	addiu	SRCREG,-4
	sw	$t3,-4(DSTREG)
	bne	SRCREG,$a3,1b
	addiu	DSTREG,-4

	j	3b
	nop

	.set	reorder
	.set	at
